{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"A very simple MNIST classifier.\n",
    "See extensive documentation at https://www.tensorflow.org/get_started/mnist/beginners\n",
    "\"\"\"\n",
    "\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "from tensorflow.examples.tutorials.mnist import input_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "我们在这里调用系统提供的 MNIST 数据函数为我们读入数据，如果没有下载的话则进行下载。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From <ipython-input-2-698ada706af1>:3: read_data_sets (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n",
      "WARNING:tensorflow:From D:\\Anaconda3\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\datasets\\mnist.py:260: maybe_download (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please write your own downloading logic.\n",
      "WARNING:tensorflow:From D:\\Anaconda3\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\datasets\\mnist.py:262: extract_images (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use tf.data to implement this functionality.\n",
      "Extracting /tmp/tensorflow/mnist/input_data\\train-images-idx3-ubyte.gz\n",
      "WARNING:tensorflow:From D:\\Anaconda3\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\datasets\\mnist.py:267: extract_labels (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use tf.data to implement this functionality.\n",
      "Extracting /tmp/tensorflow/mnist/input_data\\train-labels-idx1-ubyte.gz\n",
      "WARNING:tensorflow:From D:\\Anaconda3\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\datasets\\mnist.py:110: dense_to_one_hot (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use tf.one_hot on tensors.\n",
      "Extracting /tmp/tensorflow/mnist/input_data\\t10k-images-idx3-ubyte.gz\n",
      "Extracting /tmp/tensorflow/mnist/input_data\\t10k-labels-idx1-ubyte.gz\n",
      "WARNING:tensorflow:From D:\\Anaconda3\\lib\\site-packages\\tensorflow\\contrib\\learn\\python\\learn\\datasets\\mnist.py:290: DataSet.__init__ (from tensorflow.contrib.learn.python.learn.datasets.mnist) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use alternatives such as official/mnist/dataset.py from tensorflow/models.\n"
     ]
    }
   ],
   "source": [
    "# Import data\n",
    "data_dir = '/tmp/tensorflow/mnist/input_data'\n",
    "mnist = input_data.read_data_sets(data_dir, one_hot=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "一个非常非常简陋的模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From D:\\Anaconda3\\lib\\site-packages\\tensorflow\\python\\framework\\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n"
     ]
    }
   ],
   "source": [
    "# Create the model\n",
    "in_units=784\n",
    "h1_units=1500\n",
    "h2_units=1000\n",
    "#h3_units=800\n",
    "x1 = tf.placeholder(tf.float32, [None, in_units])\n",
    "#W1 = tf.Variable(tf.truncated_normal([in_units, h1_units],stddev=1.0))\n",
    "W1 = tf.Variable(np.random.randn(in_units, h1_units).astype('float32'))/np.sqrt(in_units)\n",
    "#b1 = tf.Variable(tf.zeros([h1_units]))\n",
    "b1 = tf.Variable(tf.constant(0.1,shape=[h1_units]))\n",
    "#W2 = tf.Variable(tf.truncated_normal([h1_units,10],stddev=1.0))\n",
    "W2 = tf.Variable(np.random.randn(h1_units, 10).astype('float32'))/np.sqrt(h1_units)\n",
    "#b2 = tf.Variable(tf.zeros([10]))\n",
    "b2 = tf.Variable(tf.constant(0.1,shape=[10]))\n",
    "#W3 = tf.Variable(tf.truncated_normal([h2_units,10],stddev=1.0))\n",
    "#b3 = tf.Variable(tf.zeros([10]))\n",
    "#W4 = tf.Variable(tf.truncated_normal([h3_units,10],stddev=1.0))\n",
    "#b4 = tf.Variable(tf.truncated_normal([10],stddev=1.0))\n",
    "hidden1=tf.nn.relu(tf.matmul(x1,W1)+b1)\n",
    "#hidden2=tf.nn.relu(tf.matmul(hidden1,W2)+b2)\n",
    "#hidden3=tf.nn.relu(tf.matmul(hidden2,W3)+b3)\n",
    "y = tf.matmul(hidden1, W2) + b2\n",
    "\n",
    "regularizer=tf.contrib.layers.l2_regularizer(0.0001)\n",
    "regularization=regularizer(W1)+regularizer(W2)#+regularizer(W3)#+regularizer(W4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "定义我们的 ground truth 占位符"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define loss and optimizer\n",
    "y_ = tf.placeholder(tf.float32, [None, 10])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "接下来我们计算交叉熵，注意这里不要使用注释中的手动计算方式，而是使用系统函数。\n",
    "另一个注意点就是，softmax_cross_entropy_with_logits 的 logits 参数是**未经激活的 wx+b**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From <ipython-input-5-3ff57236d612>:11: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "\n",
      "Future major versions of TensorFlow will allow gradients to flow\n",
      "into the labels input on backprop by default.\n",
      "\n",
      "See `tf.nn.softmax_cross_entropy_with_logits_v2`.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# The raw formulation of cross-entropy,\n",
    "#\n",
    "#   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),\n",
    "#                                 reduction_indices=[1]))\n",
    "#\n",
    "# can be numerically unstable.\n",
    "#\n",
    "# So here we use tf.nn.softmax_cross_entropy_with_logits on the raw\n",
    "# outputs of 'y', and then average across the batch.\n",
    "cross_entropy = tf.reduce_mean(\n",
    "    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "生成一个训练step"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss=cross_entropy+regularization\n",
    "train_step = tf.train.AdamOptimizer(1e-2).minimize(loss)\n",
    "\n",
    "sess = tf.Session()\n",
    "init_op = tf.global_variables_initializer()\n",
    "sess.run(init_op)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "在这里我们仍然调用系统提供的读取数据，为我们取得一个 batch。\n",
    "然后我们运行 3k 个 step (5 epochs)，对权重进行优化。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.2256\n",
      "0.9068\n",
      "0.9209\n",
      "0.931\n",
      "0.9336\n",
      "0.9373\n",
      "0.9484\n",
      "0.9497\n",
      "0.9541\n",
      "0.956\n",
      "0.9568\n",
      "0.9602\n",
      "0.9651\n",
      "0.9665\n",
      "0.9672\n",
      "0.9654\n",
      "0.9672\n",
      "0.9701\n",
      "0.9698\n",
      "0.9724\n",
      "0.9708\n",
      "0.9696\n",
      "0.9736\n",
      "0.9737\n",
      "0.9744\n",
      "0.9754\n",
      "0.9752\n",
      "0.9733\n",
      "0.976\n",
      "0.9769\n"
     ]
    }
   ],
   "source": [
    "# Train\n",
    "for i in range(3000):\n",
    "    batch_xs, batch_ys = mnist.train.next_batch(100)\n",
    "    sess.run(train_step, feed_dict={x1: batch_xs, y_: batch_ys})\n",
    "    if i%100==0:\n",
    "        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))\n",
    "        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
    "        print(sess.run(accuracy, feed_dict={x1: mnist.test.images, y_: mnist.test.labels}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "验证我们模型在测试数据上的准确率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test trained model\n",
    "#correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))\n",
    "#accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
    "#print(sess.run(accuracy, feed_dict={x1: mnist.test.images, y_: mnist.test.labels}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "毫无疑问，这个模型是一个非常简陋，性能也不理想的模型。目前只能达到 92% 左右的准确率。\n",
    "接下来，希望大家利用现有的知识，将这个模型优化至 98% 以上的准确率。\n",
    "Hint：\n",
    "- 多隐层\n",
    "- 激活函数\n",
    "- 正则化\n",
    "- 初始化\n",
    "- 摸索一下各个超参数\n",
    "  - 隐层神经元数量\n",
    "  - 学习率\n",
    "  - 正则化惩罚因子\n",
    "  - 最好每隔几个 step 就对 loss、accuracy 等等进行一次输出，这样才能有根据地进行调整"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
